{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from lxml import etree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "html_str = \"\"\"\n",
    "<!DOCTYPE html>\n",
    "<html lang=\"en\">\n",
    "<head>\n",
    "    <meta charset=\"UTF-8\">\n",
    "    <title>网页名</title>\n",
    "</head>\n",
    "<body>\n",
    "    <div>\n",
    "        div-text\n",
    "        <span>span-text</span>\n",
    "        <a>a-text</a>\n",
    "        <p>p-text</p>\n",
    "    </div>\n",
    "    <table>\n",
    "        <tr>\n",
    "            <th>Heading</th>\n",
    "            <th>Another Heading</th>\n",
    "        </tr>\n",
    "        <tr>\n",
    "            <td>row 1, cell 1</td>\n",
    "            <td>row 1, cell 2</td>\n",
    "        </tr>\n",
    "        table-text-2\n",
    "    </table>\n",
    "</body>\n",
    "</html>\n",
    "\"\"\"\n",
    "\n",
    "html = etree.HTML(html_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element html at 0x7ffc4a67a0f0>]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html.xpath('/html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element body at 0x7ffc4a6ca0f0>]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html.xpath('/html/body')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element div at 0x7ffc4a6ca730>]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html.xpath('/html/body/div')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element div at 0x7ffc4a6ca730>]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 子级获取父级\n",
    "## 点表示自身\n",
    "html.xpath('/html/body/div/./.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element body at 0x7ffc4a6ca0f0>]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 父节点\n",
    "html.xpath('/html/body/div/..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element table at 0x7ffc4a6cd5a0>]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html.xpath('/html/body/div/../table')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "## / 和 //\n",
    "## / 代表子级\n",
    "## // 代表子级的多层关系"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<Element td at 0x7ffc4a6cdf00>, <Element td at 0x7ffc4a6d21e0>]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html.xpath('/html//td')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
